import os
import sys
sys.path.append(os.path.abspath(os.path.dirname(__file__) + '/' + '../..'))

import numpy as np
import pandas as pd

from python.tools import (
    clean_folder
)

from python.construct_datasets.functions import (
    get_inflation_forecasts
)

################
## Parameters ##
################

file_name_ind_forecasts = './construct_datasets/input/individual_pgdp.xlsx'
file_name_actuals = './construct_datasets/input/p_first_second_third.xlsx'
file_name_RR = './construct_datasets/input/RR_monetary_shock_quarterly.dta'
file_name_FFR = './construct_datasets/input/DFF.csv'
file_name_NBER = './construct_datasets/input/USRECQ.csv'
output_folder = './construct_datasets/output/consensus_dataset'

#######################
## Construct dataset ##
#######################

clean_folder(output_folder)

## Calculate consensus forecasts

df_ind = pd.read_excel(file_name_ind_forecasts)
df_actuals = pd.read_excel(file_name_actuals, sheet_name = 'DATA', skiprows = 4)
df = df_ind.copy()

# Create date variable
df['DATE'] = pd.to_datetime(df['YEAR'].map(str) + 'Q' + df['QUARTER'].map(str))

# Calculate consensus forecasts for GDP deflator index
var_names = ['PGDP{}'.format(ii) for ii in range(1, 6 + 1)]
df = df.groupby('DATE').median().reset_index().loc[:, ['DATE'] + var_names]

# Construct inflation forecasts
df = get_inflation_forecasts(df, melt_id_vars = 'DATE')

# Data for 1995:Q4 is missing for first release, use second release
df_actuals.loc[df_actuals['Date'] == '1995:Q4', 'First'] = df_actuals.loc[df_actuals['Date'] == '1995:Q4', 'Second']
df_actuals.rename(columns = {'Date': 'DATE',
                             'First': 'Realiz1'}, inplace = True)
df_actuals['DATE'] = df_actuals['DATE'].map(lambda x: x.replace('Q', '0'))

# Merge forecasts and actuals
df = pd.merge(df, df_actuals.loc[:, ['DATE', 'Realiz1']], on = 'DATE', how = 'left')

## Merge-in additional variables
df_RR  = pd.read_stata(file_name_RR)
df_FFR = pd.read_csv(file_name_FFR)
df_NBER = pd.read_csv(file_name_NBER)

# Romer-Romer shocks
# Prepare DATE column for merge & clean up
df_RR['DATE'] = df_RR['date'].dt.year.map(str) + ':0' + df_RR['date'].dt.quarter.map(str)
del df_RR['resid'], df_RR['resid_romer'], df_RR['date']
df_RR.rename(columns = {'resid_full': 'RR_shocks'}, inplace = True)

# Federal Funds Rate -- aggregate to
# quarterly frequency and prepare DATE column
# for merging
df_FFR = df_FFR[['date', 'value']]
df_FFR['date'] = pd.to_datetime(df_FFR['date'])
df_FFR.index = df_FFR['date']
del df_FFR['date']
df_FFR = df_FFR.resample('Q').mean()
df_FFR['DATE'] = df_FFR.index
df_FFR['DATE'] = (df_FFR['DATE'].apply(lambda x: str(x.year)) 
                  + ':0' 
                  + df_FFR['DATE'].apply(lambda x: str(x.quarter)))
df_FFR.reset_index(drop=True, inplace=True)
df_FFR.rename(columns = {'value': 'FFR'}, inplace = True)

# NBER recession dates. Prepare DATE
# column for merging
df_NBER = df_NBER[['date', 'value']]
df_NBER['DATE'] = pd.to_datetime(df_NBER['date'].copy())
df_NBER['DATE'] = (df_NBER['DATE'].apply(lambda x: str(x.year)) 
                   + ':0' 
                   + df_NBER['DATE'].apply(lambda x: str(x.quarter)))
del df_NBER['date']
df_NBER.rename(columns = {'value': 'NBER_recession'}, inplace = True)

# Merge data
for df_temp in [df_RR, df_FFR, df_NBER]:
    df = pd.merge(df, df_temp, left_on = 'DATE', right_on = 'DATE', how = 'left')

# Save output to file
df.to_csv('{}/consensus_dataset.csv'.format(output_folder), index = False)